#load package
library(ggplot2)
library(ggthemes)
library(ggrepel)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.1.8
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(socviz)
##review data
election |> 
  select(state, total_vote, r_points, pct_trump, party, census) |>
  sample_n(5)
## # A tibble: 5 × 6
##   state    total_vote r_points pct_trump party      census   
##   <chr>         <dbl>    <dbl>     <dbl> <chr>      <chr>    
## 1 Michigan    4824542    0.220      47.2 Republican Midwest  
## 2 Oregon      2001336  -11.0        39.1 Democratic West     
## 3 Alaska       318608   14.7        51.3 Republican West     
## 4 New York    7721795  -22.5        36.5 Democratic Northeast
## 5 Oklahoma    1452992   36.4        65.3 Republican South
party_colors <- c("#2E74C0", "#CB454A")
p0 <- ggplot(data = subset(election, st %nin% "DC"), 
             mapping = aes(x = r_points, 
                           y = reorder(state, r_points), color = party))
p1 <- p0 + geom_vline(xintercept = 0, color = "gray30") + geom_point(size = 2)
p2 <- p1 + scale_color_manual(values = party_colors)
p3 <- p2 + scale_x_continuous(breaks = c( -30, -20, -10, 0, 10, 20, 30, 40), 
                              labels = c("30\n (Clinton)", "20", "10", "0", "10", "20", "30", "40\n(Trump)"))
p3 + facet_wrap(~ census, 
                ncol=1, scales="free_y") +
  guides(color=FALSE) + labs(x = "Point Margin", y = "") +
  theme(axis.text=element_text(size=8))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.

install and preview maps

#install.packages("maps")
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
us_states <- map_data("state")
head(us_states)
##        long      lat group order  region subregion
## 1 -87.46201 30.38968     1     1 alabama      <NA>
## 2 -87.48493 30.37249     1     2 alabama      <NA>
## 3 -87.52503 30.37249     1     3 alabama      <NA>
## 4 -87.53076 30.33239     1     4 alabama      <NA>
## 5 -87.57087 30.32665     1     5 alabama      <NA>
## 6 -87.58806 30.32665     1     6 alabama      <NA>
#Create a black and white map
p <- ggplot(data = us_states, mapping = aes(x = long, 
                                            y = lat, 
                                            group = group))
p + geom_polygon(fill = "white", color = "black")

#Add the colour
p <- ggplot(data = us_states, aes(x = long, 
                                  y = lat, 
                                  group = group, 
                                  fill = region))
p + geom_polygon(color = "gray90", linewidth = 0.1) + guides(fill = FALSE)

library(mapproj) #require for coord_map
#Add the colour
p <- ggplot(data = us_states, aes(x = long, 
                                  y = lat, 
                                  group = group, 
                                  fill = region))
p + geom_polygon(color = "gray90", size = 0.1) + coord_map(projection = "albers", lat0 = 39, lat1 = 45) + 
  guides(fill = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

Merge election and map datasets

election$region <- tolower(election$state)
us_states_elec <- left_join(us_states, election, by='region')
head(us_states_elec)
##        long      lat group order  region subregion   state st fips total_vote
## 1 -87.46201 30.38968     1     1 alabama      <NA> Alabama AL    1    2123372
## 2 -87.48493 30.37249     1     2 alabama      <NA> Alabama AL    1    2123372
## 3 -87.52503 30.37249     1     3 alabama      <NA> Alabama AL    1    2123372
## 4 -87.53076 30.33239     1     4 alabama      <NA> Alabama AL    1    2123372
## 5 -87.57087 30.32665     1     5 alabama      <NA> Alabama AL    1    2123372
## 6 -87.58806 30.32665     1     6 alabama      <NA> Alabama AL    1    2123372
##   vote_margin winner      party pct_margin r_points d_points pct_clinton
## 1      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 2      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 3      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 4      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 5      588708  Trump Republican     0.2773    27.72   -27.72       34.36
## 6      588708  Trump Republican     0.2773    27.72   -27.72       34.36
##   pct_trump pct_johnson pct_other clinton_vote trump_vote johnson_vote
## 1     62.08        2.09      1.46       729547    1318255        44467
## 2     62.08        2.09      1.46       729547    1318255        44467
## 3     62.08        2.09      1.46       729547    1318255        44467
## 4     62.08        2.09      1.46       729547    1318255        44467
## 5     62.08        2.09      1.46       729547    1318255        44467
## 6     62.08        2.09      1.46       729547    1318255        44467
##   other_vote ev_dem ev_rep ev_oth census
## 1      31103      0      9      0  South
## 2      31103      0      9      0  South
## 3      31103      0      9      0  South
## 4      31103      0      9      0  South
## 5      31103      0      9      0  South
## 6      31103      0      9      0  South

plot election data on a map

party_colors <- c("#2E74C0", "#CB454A")
p0 <- ggplot(data = us_states_elec, 
             mapping = aes(x = long, 
                           y = lat, 
                           group = group, 
                           fill = party))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) + 
  coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_manual(values = party_colors) +
  labs(title = "Election Results 2016", fill = NULL)
p2 + theme_map()

#review country datasets

county_map |> 
  sample_n(5)
##        long        lat  order  hole piece            group    id
## 1  150909.0  299017.78 129260 FALSE     1 0500000US38091.1 38091
## 2  937623.0 -910823.99 103256 FALSE     1 0500000US29155.1 29155
## 3  322397.9 -362124.42 111994 FALSE     1 0500000US31177.1 31177
## 4 2199227.9   40672.03  33431 FALSE     1 0500000US09003.1 09003
## 5  551637.4 -943608.59  19423 FALSE     1 0500000US05015.1 05015
county_data |> 
  select(id, name, state, pop_dens) |> 
  sample_n(5)
##      id              name state      pop_dens
## 1 27095 Mille Lacs County    MN [   10,   50)
## 2 40131     Rogers County    OK [  100,  500)
## 3 21017    Bourbon County    KY [   50,  100)
## 4 22065    Madison Parish    LA [   10,   50)
## 5 31051      Dixon County    NE [   10,   50)

#merge datasets

county_full <- left_join(county_map, county_data, by = "id")
head(county_full)
##      long      lat order  hole piece            group    id           name
## 1 1225889 -1275020     1 FALSE     1 0500000US01001.1 01001 Autauga County
## 2 1235324 -1274008     2 FALSE     1 0500000US01001.1 01001 Autauga County
## 3 1244873 -1272331     3 FALSE     1 0500000US01001.1 01001 Autauga County
## 4 1244129 -1267515     4 FALSE     1 0500000US01001.1 01001 Autauga County
## 5 1272010 -1262889     5 FALSE     1 0500000US01001.1 01001 Autauga County
## 6 1276797 -1295514     6 FALSE     1 0500000US01001.1 01001 Autauga County
##   state census_region      pop_dens   pop_dens4   pop_dens6   pct_black   pop
## 1    AL         South [   50,  100) [ 45,  118) [ 82,  215) [15.0,25.0) 55395
## 2    AL         South [   50,  100) [ 45,  118) [ 82,  215) [15.0,25.0) 55395
## 3    AL         South [   50,  100) [ 45,  118) [ 82,  215) [15.0,25.0) 55395
## 4    AL         South [   50,  100) [ 45,  118) [ 82,  215) [15.0,25.0) 55395
## 5    AL         South [   50,  100) [ 45,  118) [ 82,  215) [15.0,25.0) 55395
## 6    AL         South [   50,  100) [ 45,  118) [ 82,  215) [15.0,25.0) 55395
##   female white black travel_time land_area hh_income su_gun4 su_gun6 fips
## 1   51.5  78.1  18.4        26.2    594.44     53682 [11,54] [10,12) 1001
## 2   51.5  78.1  18.4        26.2    594.44     53682 [11,54] [10,12) 1001
## 3   51.5  78.1  18.4        26.2    594.44     53682 [11,54] [10,12) 1001
## 4   51.5  78.1  18.4        26.2    594.44     53682 [11,54] [10,12) 1001
## 5   51.5  78.1  18.4        26.2    594.44     53682 [11,54] [10,12) 1001
## 6   51.5  78.1  18.4        26.2    594.44     53682 [11,54] [10,12) 1001
##   votes_dem_2016 votes_gop_2016 total_votes_2016 per_dem_2016 per_gop_2016
## 1           5908          18110            24661    0.2395685    0.7343579
## 2           5908          18110            24661    0.2395685    0.7343579
## 3           5908          18110            24661    0.2395685    0.7343579
## 4           5908          18110            24661    0.2395685    0.7343579
## 5           5908          18110            24661    0.2395685    0.7343579
## 6           5908          18110            24661    0.2395685    0.7343579
##   diff_2016 per_dem_2012 per_gop_2012 diff_2012 winner partywinner16 winner12
## 1     12202    0.2657577    0.7263374     11012  Trump    Republican   Romney
## 2     12202    0.2657577    0.7263374     11012  Trump    Republican   Romney
## 3     12202    0.2657577    0.7263374     11012  Trump    Republican   Romney
## 4     12202    0.2657577    0.7263374     11012  Trump    Republican   Romney
## 5     12202    0.2657577    0.7263374     11012  Trump    Republican   Romney
## 6     12202    0.2657577    0.7263374     11012  Trump    Republican   Romney
##   partywinner12 flipped
## 1    Republican      No
## 2    Republican      No
## 3    Republican      No
## 4    Republican      No
## 5    Republican      No
## 6    Republican      No

#plotting population density

p <- ggplot(data = county_full, 
            mapping = aes(x = long, 
                          y = lat, 
                          fill = pop_dens, 
                          group = group))
p1 <- p + geom_polygon(color = "gray90", size = 0.05) + 
  coord_equal()
p2 <- p1 + scale_fill_brewer(palette="Blues",
labels = c("0-10", "10-50", "50-100", "100-500", "500-1,000", "1,000-5,000", ">5,000"))
p2 + labs(fill = "Population per nsquare mile") + 
  theme_map() +
  guides(fill = guide_legend(nrow = 1)) +
  theme(legend.position = "bottom")

#the range of pop per sq mile depends on the “pop_den” range #if we need to change these ranges, me must reclassify before plotting the graph.

#Reference: [https://app.diagrams.net/]

##install

#install.packages("DiagrammeR")
library(DiagrammeR)


#making basic flowchart
grViz(diagram = "digraph flowchart {
  tab1 [label = '@@1', fontname = arial, shape = plaintext, fontcolor = blue]
  tab2 [label = '@@2', fontname = arial, shape = plaintext, fontcolor = blue] 
  tab3 [label = '@@3', fontname = arial, shape = plaintext, fontcolor = blue]
  tab4 [label = '@@4', fontname = arial, shape = plaintext]
  
  tab1 -> tab2 -> tab3 [color = red, arrowhead = vee, arrowsize = 1, penwidth = 5]; 
  tab2 -> tab4
}
  
  [1]: 'Artefact collection in field' 
  [2]: 'Preliminary dating of artefacts (visual)'    
  [3]: 'Artefacts sent to lab for dating'    
  [4]: 'Artefacts put in storage'
  ")